/*

02_sample_descriptives.do

Purpose: create sample descriptives table
Inputs: student-level-panel
Outputs: descriptive-statistics, descriptive-statistics-note
	
*/

global pretreatment = "female black hispanic white resident"
global markets = "prek3 prek4 k hs simulator_eligible simulator_app_eligible simulator"
global simulator_interactions = "app_risk warnings_email email_received"

global samples = "all2020 simulator2020 warnings sim_only control2019"

global risk_types = "risk_real_initial risk_sim_initial risk_real_final"
global risk_stats = "p1 p50 p99 mean"

use "$int/student-level-panel", clear

* Get subsamples
gen all2020 = year == 2020
gen simulator2020 = simulator == 1 
gen control2019 = simulator_app_eligible == 1 & year == 2019
gen warnings = treat == 3
gen sim_only = treat == 1

* In 2019, every application by construction only contains schools that existed in 2019
replace simulator = simulator_app_eligible == 1 if year == 2019

* Fix simulated initial risk for those that don't have one
replace risk_sim_initial = app_risk if mi(risk_sim_initial)

** Get relevant risk statistics

foreach type in "sim" "real" {
	
	foreach time in "initial" "final" {
		gen risk_`type'_`time'_p50 = risk_`type'_`time' > .5 if !mi(risk_`type'_`time')
		gen risk_`type'_`time'_p1 = risk_`type'_`time' < .01 if !mi(risk_`type'_`time')
		gen risk_`type'_`time'_p99 = risk_`type'_`time' > .99 if !mi(risk_`type'_`time')
		gen risk_`type'_`time'_mean = risk_`type'_`time' if !mi(risk_`type'_`time')
		
	}
}

foreach var of varlist *mean {
	label var `var' "Mean"
}

foreach var of varlist *p1 {
	label var `var' "$< 1\%$"
}

foreach var of varlist *p50 {
	label var `var' "$> 50\%$"
}
	foreach var of varlist *p99 {
	label var `var' "$> 99\%$"
}

* Fix labels
label var change_school_ever "Change school"
label var change_school_final "Change school"
label var lengthen_app_ever "Lengthen app."
label var lengthen_app_final "Lengthen app."
label var new_insert_ever "Insert new school"
label var new_insert_final "Insert new school"
label var new_append_ever "Append new school"
label var new_append_final "Append new school"
label var shorten_app_ever "Shorten app."
label var shorten_app_final "Shorten app."
label var modify_ever "Change length or school"
label var modify_final "Change length or school"
label var hs "Grade 9"
label var simulator_eligible "Apply to eligible grade"
label var simulator_app_eligible "$+$ in time"
label var simulator "$+$ only to simulator schools"

//// Balance table
cap file close f 
file open f using "$tables/descriptive-statistics.tex", write replace

file write f "\begin{tabular}{l*{5}{c}}"_n

file write f "\toprule \\[-1.0em]"_n

* Column panels
file write f "& \multicolumn{4}{c}{2020} & "
file write f " \multicolumn{1}{c}{2019} \\"_n
file write f "\cmidrule(lr){2-5} \cmidrule(lr){6-6} \\[-1.0em]"_n

file write f " & All grades & Eligible & Warnings & Simulator & Comparison group\\"_n
file write f "\midrule \\[-1.0em]"_n

/// Standardize the procedure of how to fill the rows for the table body
cap program drop write_rows
program define write_rows
	local list = "`1'"
	foreach row in ${`list'} {
		
		file write f "`:var la `row''"
		
		foreach samp in $samples {
			qui su `row' if `samp' == 1
			
			// Don't write stats on second stage outcomes for all grades col
			if "`samp'" == "all2020" & ///
				inlist("`list'","outcomes_sec_stage","outcomes_sec_stage_final") {
					file write f "&"
				}
				
			else {
				file write f "&" %4.3fc (`r(mean)')
			}
		} // samp
		
		file write f "\\ "_n
	} // row

end

/// Demographics
file write f "\emph{I. Demographics} \\"_n
write_rows "pretreatment"

file write f "\\"_n

/// Grades and simulator elibility/sample status
file write f "\emph{II. Simulator Eligiblity} \\"_n
write_rows "markets"
file write f "\\"_n

/// Interaction with simulator
file write f "\emph{III. Interactions with simulator} \\"_n
write_rows "simulator_interactions"
file write f "\\"_n

/// Placements
file write f "\emph{IV. Placements} \\"_n
write_rows "outcomes_placement"
file write f "\\"_n	

/// App usage outcomes
file write f "\emph{V. Choice outcomes} \\"_n
write_rows "outcomes_sec_stage"
file write f "\\"_n

file write f "\hline \\[-1.0em]"_n
file write f "\emph{N}  "

foreach samp in $samples {
	
	qui count if `samp' == 1
	
	file write f "&" %4.0fc (`r(N)') 		
} // samp

file write f "\\"_n

file write f "\bottomrule"_n
file write f "\end{tabular}"_n
file close f

/// Table note

cap file close f 

file open f using "$notes/descriptive-statistics-note.tex", write replace

file write f "\floatfoot{\footnotesize\textit{Notes.} Samples vary by column. The first column consists of "
file write f "all applicants in 2020. The second column consists of the sample that was eligible for treatment in 2020."
file write f " Columns 3 and 4 represent the subsamples that have been assigned to either treatment group."
file write f " The fifth column consists of those applicants in 2019 that would have been eligible for treatment had their been any. "_n
file write f " Statistics reported represent shares of applicants in the respective sample or the mean difference in the last two rows of panel V.}"

file close f 